from funasr import AutoModel
import os

from pydub import AudioSegment

import tools.audio_utils as audio_utils

path_vad  = 'tools/asr/models/speech_fsmn_vad_zh-cn-16k-common-pytorch'
path_vad  = path_vad  if os.path.exists(path_vad)  else "iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"

model = AutoModel(
    model=path_vad,
    model_revision="v2.0.4"
)
# wav_path = "../../data/1_3月1日(1)_(Vocals).mp3"
# 语音端点检测
# res=model.generate(input=wav_path)
# print(res)

def vad(input_audio_file, output_folder,speaker_name,mini_length=5):
    # 确保输出文件夹存在
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    # 语音端点检测
    res = model.generate(input=input_audio_file)
    # 获取语音段的起始时间和结束时间
    segments = res[0]['value']
    # 创建一个空的音频段列表
    audio_segments = []
    # 循环遍历语音段
    for i, segment in enumerate(segments):
        start, end = segment
        end_ = end / 1000
        start_ = start / 1000
        if end_ - start_ < mini_length:
            print(f"语音段{i+1}：{start_}秒到{end_}秒, 时长{(end_) - (start_)}秒, 小于{mini_length}秒，跳过")
            continue
        print(f"语音段{i+1}：{start_}秒到{end_}秒, 时长{(end_) - (start_)}秒")
        audio= audio_utils.split_audio(input_audio_file, output_folder, speaker_name, start_, end_)
        # 加载音频文件
        audio_segment = AudioSegment.from_file(audio)
        # 将加载的音频段添加到列表中
        audio_segments.append(audio_segment)

    # 保存合并后的音频文件

    audio = AudioSegment.empty();
    for j, audio_segment in enumerate(audio_segments):
        print(f"合并音频段{j+1}")
        audio += audio_segment

    audio.set_frame_rate(44100)
    audio.export(os.path.join(output_folder, f"{speaker_name}_merge.wav"), format="wav")


# 输入音频文件路径
vad("../../data/lanying/huanzou_7db.wav", "../../data/lanying/vad/huanzou_7db","huanzou2",0);
# vad("../../data/岚英.WAV", "../../data/岚英/岚英");
# vad("../../data/lanyingjp.WAV", "../../data/lanyingjp/lanyingjp");